In [1]:
import obsidian
print(f'obsidian version: ' + obsidian.__version__)

import pandas as pd
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook"
obsidian version: 0.8.0

Set up parameter space and initialize a design¶

In [2]:
from obsidian import Campaign, Target, ParamSpace, BayesianOptimizer
from obsidian.parameters import Param_Categorical, Param_Ordinal, Param_Continuous
In [3]:
params = [
    Param_Continuous('Temperature', -10, 30),
    Param_Continuous('Concentration', 10, 150),
    Param_Continuous('Enzyme', 0.01, 0.30),
    Param_Categorical('Variant', ['MRK001', 'MRK002', 'MRK003']),
    Param_Ordinal('Stir Rate', ['Low', 'Medium', 'High']),
    ]

X_space = ParamSpace(params)
target = Target('Yield', aim='max')
campaign = Campaign(X_space, target, seed=0)
X0 = campaign.initialize(m_initial = 10, method = 'LHS')

X0
Out[3]:
Temperature Concentration Enzyme Variant Stir Rate
0 8.0 17.0 0.1405 MRK001 Medium
1 12.0 143.0 0.1695 MRK003 Medium
2 4.0 101.0 0.2855 MRK002 High
3 28.0 87.0 0.1115 MRK002 Low
4 -4.0 115.0 0.2275 MRK001 Low
5 -8.0 73.0 0.0825 MRK002 Medium
6 20.0 129.0 0.0535 MRK001 High
7 24.0 31.0 0.2565 MRK002 Medium
8 16.0 59.0 0.1985 MRK003 High
9 0.0 45.0 0.0245 MRK003 Low

Collect results (e.g. from a simulation)¶

In [4]:
from obsidian.experiment import Simulator
from obsidian.experiment.benchmark import shifted_parab

simulator = Simulator(X_space, shifted_parab, name='Yield', eps=0.05)
y0 = simulator.simulate(X0)
Z0 = pd.concat([X0, y0], axis=1)

campaign.add_data(Z0)
campaign.data.sample(5)
Out[4]:
Temperature Concentration Enzyme Variant Stir Rate Yield Iteration
Observation ID
5 -8.0 73.0 0.0825 MRK002 Medium 86.196333 0
2 4.0 101.0 0.2855 MRK002 High 46.270422 0
3 28.0 87.0 0.1115 MRK002 Low 60.288919 0
4 -4.0 115.0 0.2275 MRK001 Low 63.082417 0
1 12.0 143.0 0.1695 MRK003 Medium 44.280131 0
In [5]:
from obsidian.plotting import visualize_inputs

fig = visualize_inputs(campaign)
No description has been provided for this image

Fit the optimizer and visualize results¶

In [6]:
campaign.fit()
GP model has been fit                       to data with a train-score of: 1 for response: Yield
In [7]:
from obsidian.plotting import parity_plot, factor_plot, optim_progress
In [8]:
parity_plot(campaign.optimizer)
In [9]:
factor_plot(campaign.optimizer, feature_id=0)

Optimize new experiment suggestions¶

In [10]:
X_suggest, eval_suggest = campaign.suggest(m_batch=3)
In [11]:
df_suggest = pd.concat([X_suggest, eval_suggest], axis=1)
df_suggest
Out[11]:
Temperature Concentration Enzyme Variant Stir Rate Yield (pred) Yield lb Yield ub f(Yield) aq Value aq Value (joint) aq Method
0 -10.000000 10.0 0.131014 MRK001 Low 104.655575 97.694464 111.616687 2.349349 -0.380345 -0.292665 NEI
1 -10.000000 10.0 0.135985 MRK001 High 101.870533 94.441592 109.299474 2.195901 -0.612096 -0.292665 NEI
2 -1.227879 10.0 0.114474 MRK002 Low 101.243266 97.004850 105.481684 2.161341 -0.709565 -0.292665 NEI

Collect data at new suggestions¶

In [12]:
y_iter1 = pd.DataFrame(simulator.simulate(X_suggest), columns = ['Yield'])
Z_iter1 = pd.concat([X_suggest, y_iter1, eval_suggest], axis=1)
campaign.add_data(Z_iter1)
campaign.data.tail()
Out[12]:
Temperature Concentration Enzyme Variant Stir Rate Yield Iteration Yield (pred) Yield lb Yield ub f(Yield) aq Value aq Value (joint) aq Method Yield (max) (iter)
Observation ID
8 16.000000 59.0 0.198500 MRK003 High 49.876574 0 NaN NaN NaN NaN NaN NaN NaN 92.339807
9 0.000000 45.0 0.024500 MRK003 Low 81.228810 0 NaN NaN NaN NaN NaN NaN NaN 92.339807
10 -10.000000 10.0 0.131014 MRK001 Low 92.196849 1 104.655575 97.694464 111.616687 2.349349 -0.380345 -0.292665 NEI 95.000467
11 -10.000000 10.0 0.135985 MRK001 High 74.499468 1 101.870533 94.441592 109.299474 2.195901 -0.612096 -0.292665 NEI 95.000467
12 -1.227879 10.0 0.114474 MRK002 Low 95.000467 1 101.243266 97.004850 105.481684 2.161341 -0.709565 -0.292665 NEI 95.000467

Repeat as desired¶

In [13]:
for iter in range(3):
    campaign.fit()
    X_suggest, eval_suggest = campaign.suggest(m_batch=3)
    y_iter = pd.DataFrame(simulator.simulate(X_suggest))
    Z_iter = pd.concat([X_suggest, y_iter, eval_suggest], axis=1)
    campaign.add_data(Z_iter)
GP model has been fit                       to data with a train-score of: 1 for response: Yield
GP model has been fit                       to data with a train-score of: 1 for response: Yield
GP model has been fit                       to data with a train-score of: 1 for response: Yield
In [14]:
optim_progress(campaign, color_feature_id = 'aq Value')

Analyze using Explainer¶

In [15]:
from obsidian.campaign import Explainer
In [16]:
exp = Explainer(campaign.optimizer)
exp.shap_explain(n=500)
  0%|          | 0/500 [00:00<?, ?it/s]
In [17]:
exp.shap_summary()
Out[17]:
No description has been provided for this image
In [18]:
exp.shap_pdp_ice(ind = 0, ice_color_var = 2)
Out[18]:
No description has been provided for this image